In this document, I explore 4 approaches to SVD with Dimkid data:
- No mean-centering, characters as rows and capacities as columns
- No mean-centering, capacities as rows and characters as columns
- Mean-centering by capacity, characters as rows and capacities as columns (the classic approach)
- Mean-centering by character, capacities as rows and characters as columns
Approach #1
In Approach #1 we proceed without mean-centering, with characters as rows and capacities as columns.
Raw data matrix visualizations
Adults
In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, PNAS, Study 4.)

7-9yo children
In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

4-6yo children
In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

Approach #2
In Approach #2 we proceed without mean-centering, with capacities as rows and characters as columns.
Raw data matrix visualizations
Adults
In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, PNAS, Study 4.)

7-9yo children
In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

4-6yo children
In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

Approach #3
In Approach #3 we mean-center by capacity, with characters as rows and capacities as columns.
Raw data matrix visualizations
Adults
In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, PNAS, Study 4.)

7-9yo children
In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

4-6yo children
In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

Approach #4
In Approach #4 we mean-center by character, with capacities as rows and characters as columns.
Raw data matrix visualizations
Adults
In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, PNAS, Study 4.)

7-9yo children
In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

4-6yo children
In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, Proc. CogSci.)

---
title: "Dimkid: Singular Value Decomposition (SVD)"
output:
  html_notebook:
    toc: true
    toc_float: true
---

```{r, include = F}
knitr::opts_chunk$set(echo = FALSE, message = FALSE)
```

```{r setup, include = F}
library(tidyverse)
library(rms)
```

```{r data, include = F}
setwd("/Users/kweisman/Documents/Research (Stanford)/Projects/Dimkid_svd/dimkid_svd/")

# import data
d_ad <- read.csv("./data/means_adults.csv") %>% 
  mutate(X = ifelse(grepl("vegetative", X), "pvs", as.character(X)),
         X = factor(X,
                    levels = c("stapler", "car", "computer", "robot", "microbe",
                               "fish", "beetle", "blue_jay", "frog", "mouse",
                               "goat", "dog", "bear", "dolphin", "elephant",
                               "chimpanzee", "fetus", "pvs", "infant", "child",
                               "adult"))) %>%
                    # levels = c("adult", "child", "infant", "pvs", "fetus",
                    #            "chimpanzee", "elephant", "dolphin", "bear",
                    #            "dog", "goat", "mouse", "frog", "blue_jay",
                    #            "beetle", "fish", "microbe", "robot", "computer",
                    #            "car", "stapler"))) %>%
  rename(character = X) %>%
  arrange(character)

d_79 <- read.csv("./data/means_79y.csv") %>% 
  mutate(X = factor(X,
                    levels = c("computer", "robot", "doll", "teddy_bear",
                               "beetle", "bird", "mouse", "goat", 
                               "elephant"))) %>%
                    # levels = c("elephant", "goat", "mouse", "bird", "beetle",
                    #            "teddy_bear", "doll", "robot", "computer"))) %>%
  rename(character = X) %>%
  arrange(character)

d_46 <- read.csv("./data/means_46y.csv") %>%
  mutate(X = factor(X,
                    levels = c("computer", "robot", "doll", "teddy_bear",
                               "beetle", "bird", "mouse", "goat", 
                               "elephant"))) %>%
                    # levels = c("elephant", "goat", "mouse", "bird", "beetle",
                    #            "teddy_bear", "doll", "robot", "computer"))) %>%
  rename(character = X) %>%
  arrange(character)
```

```{r names, include = F}
# set item and property names
capacity_names_ad <- names(d_ad[-1])
character_names_ad <- levels(d_ad$character)

capacity_names_ch <- names(d_79[-1])
character_names_ch <- levels(d_79$character)
```

```{r functions, include = F}
mean_cent_fun <- function(df, 
                          by = c("capacity", "character"),
                          age_group = c("adults", "children")){
  
  if(age_group == "adults"){
    capacity_names = capacity_names_ad
    character_names = character_names_ad
  }
  if(age_group == "children"){
    capacity_names = capacity_names_ch
    character_names = character_names_ch
  }
  
  if(by == "capacity"){
    not_by <- "character"
  }
  
  if(by == "character"){
    not_by <- "capacity"
    # df <- df %>%
    #   gather(capacity, mean_rating, -character) %>%
    #   mutate(capacity = factor(capacity, levels = capacity_names)) %>%
    #   spread(character, mean_rating)
  }
  
  df_cent <- df %>%
    gather(capacity, mean_rating, -character) %>%
    group_by_(as.name(by)) %>%
    mutate(group_mean = mean(mean_rating),
           mean_cent = mean_rating - group_mean) %>%
    ungroup() %>%
    select(-mean_rating, -group_mean) %>%
    spread_(key_col = by,
            value_col = "mean_cent") %>%
    data.frame()
  
  return(df_cent)
}

U_fun <- function(svd,
                       age_group = c("adults", "children"),
                       transpose = c(FALSE, TRUE),
                       multiply = c(FALSE, TRUE)){

  if(transpose == FALSE) {
    if(age_group == "adults"){y_names <- character_names_ad}
    if(age_group == "children"){y_names <- character_names_ch}
  } else if(transpose == TRUE) {
    if(age_group == "adults"){y_names <- capacity_names_ad}
    if(age_group == "children"){y_names <- capacity_names_ch}
  }  
  
  S <- svd$d

  U <- svd$u %>% 
    data.frame() %>%
    rownames_to_column("property") %>%
    mutate(property = as.numeric(property),
           property = factor(property, labels = y_names)) %>%
    gather(mode, value, -property) %>%
    mutate(mode = as.numeric(as.character(gsub("X", "", mode))),
           mode = ifelse(mode < 10, paste0("mode_0", as.character(mode)), 
                         paste0("mode_", as.character(mode)))) 
  
  if(multiply){
      UxS <- U %>%
        mutate(S = factor(mode, labels = S),
               S = as.numeric(as.character(S)),
               value = value * S)
      df_final <- UxS
  } else {df_final <- U}
  
  return(df_final)

}

plot_U_fun <- function(svd,
                       age_group = c("adults", "children"),
                       transpose = c(FALSE, TRUE),
                       multiply = c(FALSE, TRUE)){

  df_final <- U_fun(svd, age_group, transpose, multiply)
  
  plot <- ggplot(df_final, aes(x = mode, y = property, fill = value)) +
    geom_tile(color = "black") +
    # geom_text(aes(label = format(round(value, 2), nsmall = 2)), size = 3) +
    scale_fill_distiller(palette = "RdBu", direction = -1,
                         guide = guide_colorbar(barheight = 15)) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  
  return(plot)
}

plot_S_fun <- function(svd){
  
  S <- diag(x = svd$d, nrow = length(svd$d), ncol = length(svd$d)) %>%
    data.frame() %>%
    rownames_to_column("modeA") %>%
    gather(modeB, value, -modeA) %>%
    mutate(modeA = ifelse(as.numeric(modeA) < 10, 
                          paste0("mode_0", as.character(modeA)), 
                          paste0("mode_", as.character(modeA))),
           modeB = gsub("X", "", modeB),
           modeB = ifelse(as.numeric(modeB) < 10,
                          paste0("mode_0", as.character(modeB)),
                          paste0("mode_", as.character(modeB))))
  
  plot <- ggplot(S, aes(x = modeA, 
                        y = reorder(modeB, desc(modeB)), 
                        fill = value)) +
    geom_tile(color = "black") +
    # geom_text(aes(label = format(round(value, 2), nsmall = 2)), size = 3) +
    scale_fill_distiller(palette = "Reds", direction = 1,
                         guide = guide_colorbar(barheight = 15)) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  
  return(plot)
}

V_fun <- function(svd,
                       age_group = c("adults", "children"),
                       transpose = c(FALSE, TRUE),
                       multiply = c(FALSE, TRUE)){

  if(transpose == FALSE) {
    if(age_group == "adults"){y_names <- capacity_names_ad}
    if(age_group == "children"){y_names <- capacity_names_ch}
  } else if(transpose == TRUE) {
    if(age_group == "adults"){y_names <- character_names_ad}
    if(age_group == "children"){y_names <- character_names_ch}
  }
  
  S <- svd$d

  V <- svd$v %>% 
    data.frame() %>%
    rownames_to_column("item") %>%
    mutate(item = as.numeric(item),
           item = factor(item, labels = y_names)) %>%
    gather(mode, value, -item) %>%
    mutate(mode = as.numeric(as.character(gsub("X", "", mode))),
           mode = ifelse(mode < 10, paste0("mode_0", as.character(mode)), 
                         paste0("mode_", as.character(mode)))) 
  
  if(multiply){
      VxS <- V %>%
        mutate(S = factor(mode, labels = S),
               S = as.numeric(as.character(S)),
               value = value * S)
      df_final <- VxS
  } else {df_final <- V}
  
  return(df_final)
}

plot_V_fun <- function(svd,
                       age_group = c("adults", "children"),
                       transpose = c(FALSE, TRUE),
                       multiply = c(FALSE, TRUE)){

  df_final <- V_fun(svd, age_group, transpose, multiply)
  
  plot <- ggplot(df_final, aes(x = mode, y = item, fill = value)) +
    geom_tile(color = "black") +
    # geom_text(aes(label = format(round(value, 2), nsmall = 2)), size = 3) +
    scale_fill_distiller(palette = "RdBu", direction = -1,
                         guide = guide_colorbar(barheight = 15)) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  
  return(plot)
}

reconstruct_fun <- function(svd,
                            age_group = c("adults", "children"),
                            transpose = c(FALSE, TRUE)){

  if(age_group == "adults"){
      capacity_names <- capacity_names_ad
      character_names <- character_names_ad
  }
  if(age_group == "children"){
      capacity_names <- capacity_names_ch
      character_names <- character_names_ch
  }
  
  if(transpose == FALSE){
    x_names <- capacity_names
    y_names <- character_names
  }

  if(transpose == TRUE){
    x_names <- character_names
    y_names <- capacity_names
  }
  
  # calculate outer products, multiply by singular value
  OP <- list()
  for(i in 1:length(svd$d)){
    OP[[i]] <- outer(svd$u[,i], svd$v[,i]) * svd$d[i]
  }
  
  # add together
  OP_sum <- Reduce("+", OP) %>%
    data.frame() %>%
    rownames_to_column("y") %>%
    mutate(y = factor(as.numeric(y), labels = y_names)) %>%
    gather(x, value, -y) %>%
    mutate(x = gsub("X", "", x),
           x = factor(as.numeric(x), labels = x_names))
  
  return(OP_sum)
}

plot_reconstruct_fun <- function(svd, 
                       age_group = c("adults", "children"),
                       transpose = c(FALSE, TRUE)){

  OP_sum <- reconstruct_fun(svd, age_group, transpose)

  plot <- ggplot(OP_sum, aes(x = x, y = y, fill = value)) +
    geom_tile(color = "black") +
    # geom_text(aes(label = format(round(value, 2), nsmall = 2)), size = 3) +
    scale_fill_distiller(palette = "RdBu", direction = -1,
                         guide = guide_colorbar(barheight = 15)) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  
  return(plot)
}
```


In this document, I explore 4 approaches to SVD with Dimkid data:

1. No mean-centering, characters as rows and capacities as columns
2. No mean-centering, capacities as rows and characters as columns
3. Mean-centering by capacity, characters as rows and capacities as columns (the classic approach)
4. Mean-centering by character, capacities as rows and characters as columns

# Approach #1

In Approach #1 we proceed without mean-centering, with characters as rows and capacities as columns.

## Raw data matrix visualizations {.tabset}

### Adults

In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, _PNAS_, Study 4.)

```{r, fig.width = 4, fig.asp = 0.67}
d_ad %>%
  gather(capacity, mean_rating, -character) %>%
  ggplot(aes(x = capacity, y = character, fill = mean_rating)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-3, 3),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: Adults",
       subtitle = "21 characters, 40 capacities",
       x = "Capacity",
       y = "Character",
       fill = "Mean rating")
```

### 7-9yo children

In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 4, fig.asp = 0.67}
d_79 %>%
  gather(capacity, mean_rating, -character) %>%
  ggplot(aes(x = capacity, y = character, fill = mean_rating)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(0, 1),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 7- to 9-year-old children",
       subtitle = "9 characters, 20 capacities",
       x = "Capacity",
       y = "Character",
       fill = "Mean rating")
```

### 4-6yo children

In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 4, fig.asp = 0.67}
d_46 %>%
  gather(capacity, mean_rating, -character) %>%
  ggplot(aes(x = capacity, y = character, fill = mean_rating)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(0, 1),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 4- to 6-year-old children",
       subtitle = "9 characters, 20 capacities",
       x = "Capacity",
       y = "Character",
       fill = "Mean rating")
```


## SVD {.tabset}

### Adults

```{r}
svd1_ad <- svd(d_ad %>% column_to_rownames("character"), 
               nrow(d_ad), nrow(d_ad))
```

```{r, fig.width = 4, fig.asp = 0.67}
plot_reconstruct_fun(svd1_ad, transpose = FALSE, "adults") +
  labs(title = "SVD: Adults (Approach #1)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_U_fun(svd1_ad, "adults", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #1)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd1_ad) +
  labs(title = "SVD: Adults (Approach #1)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_V_fun(svd1_ad, "adults", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #1)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r}

```

### Children, 7-9y

```{r}
svd1_79 <- svd(d_79 %>% column_to_rownames("character"), 
               nrow(d_79), nrow(d_79))
```

```{r, fig.width = 4, fig.asp = 0.67}
plot_reconstruct_fun(svd1_79, transpose = FALSE, "children") +
  labs(title = "SVD: Children, 7-9y (Approach #1)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_U_fun(svd1_79, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #1)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd1_79) +
  labs(title = "SVD: Children, 7-9y (Approach #1)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_V_fun(svd1_79, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #1)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

### Children, 4-6y

```{r}
svd1_46 <- svd(d_46 %>% column_to_rownames("character"), 
               nrow(d_46), nrow(d_46))
```

```{r, fig.width = 4, fig.asp = 0.67}
plot_reconstruct_fun(svd1_46, transpose = FALSE, "children") +
  labs(title = "SVD: Children, 4-6y (Approach #1)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_U_fun(svd1_46, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #1)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd1_46) +
  labs(title = "SVD: Children, 4-6y (Approach #1)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_V_fun(svd1_46, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #1)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```


# Approach #2

In Approach #2 we proceed without mean-centering, with capacities as rows and characters as columns.

## Raw data matrix visualizations {.tabset}

### Adults

In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, _PNAS_, Study 4.)

```{r, fig.width = 3, fig.asp = 1.79}
d_ad %>%
  gather(capacity, mean_rating, -character) %>%
  ggplot(aes(y = capacity, x = character, fill = mean_rating)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-3, 3),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: Adults",
       subtitle = "21 characters, 40 capacities",
       y = "Capacity",
       x = "Character",
       fill = "Mean rating")
```

### 7-9yo children

In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 3, fig.asp = 1.49}
d_79 %>%
  gather(capacity, mean_rating, -character) %>%
  ggplot(aes(y = capacity, x = character, fill = mean_rating)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(0, 1),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 7- to 9-year-old children",
       subtitle = "9 characters, 20 capacities",
       y = "Capacity",
       x = "Character",
       fill = "Mean rating")
```

### 4-6yo children

In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 3, fig.asp = 1.49}
d_46 %>%
  gather(capacity, mean_rating, -character) %>%
  ggplot(aes(y = capacity, x = character, fill = mean_rating)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(0, 1),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 4- to 6-year-old children",
       subtitle = "9 characters, 20 capacities",
       y = "Capacity",
       x = "Character",
       fill = "Mean rating")
```


## SVD {.tabset}

### Adults

```{r}
svd2_ad <- svd(d_ad %>% column_to_rownames("character") %>% t(), 
               nrow(d_ad), nrow(d_ad))
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_reconstruct_fun(svd2_ad, "adults", TRUE) +
  labs(title = "SVD: Adults (Approach #2)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_U_fun(svd2_ad, "adults", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #2)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd2_ad) +
  labs(title = "SVD: Adults (Approach #2)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_V_fun(svd2_ad, "adults", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #2)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

### Children, 7-9y

```{r}
svd2_79 <- svd(d_79 %>% column_to_rownames("character") %>% t(), 
               nrow(d_79), nrow(d_79))
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_reconstruct_fun(svd2_79, "children", TRUE) +
  labs(title = "SVD: Children, 7-9y (Approach #2)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_U_fun(svd2_79, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #2)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd2_79) +
  labs(title = "SVD: Children, 7-9y (Approach #2)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_V_fun(svd2_79, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #2)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

### Children, 4-6y

```{r}
svd2_46 <- svd(d_46 %>% column_to_rownames("character") %>% t(), 
               nrow(d_46), nrow(d_46))
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_reconstruct_fun(svd2_46, "children", TRUE) +
  labs(title = "SVD: Children, 4-6y (Approach #2)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_U_fun(svd2_46, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #2)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd2_46) +
  labs(title = "SVD: Children, 4-6y (Approach #2)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_V_fun(svd2_46, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #2)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```


# Approach #3

In Approach #3 we mean-center by capacity, with characters as rows and capacities as columns.

## Raw data matrix visualizations {.tabset}

### Adults

In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, _PNAS_, Study 4.)

```{r, fig.width = 4, fig.asp = 0.67}
d_ad %>%
  mean_cent_fun(by = "capacity", age_group = "adults") %>%
  gather(capacity, mean_cent, -character) %>%
  ggplot(aes(x = capacity, y = character, fill = mean_cent)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-6, 6),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: Adults",
       subtitle = "21 characters, 40 capacities",
       x = "Capacity",
       y = "Character",
       fill = "Mean rating")
```

### 7-9yo children

In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 4, fig.asp = 0.67}
d_79 %>%
  mean_cent_fun(by = "capacity", age_group = "children") %>%
  gather(capacity, mean_cent, -character) %>%
  ggplot(aes(x = capacity, y = character, fill = mean_cent)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-2, 2),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 7- to 9-year-old children",
       subtitle = "9 characters, 20 capacities",
       x = "Capacity",
       y = "Character",
       fill = "Mean rating")
```

### 4-6yo children

In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 4, fig.asp = 0.67}
d_46 %>%
  mean_cent_fun(by = "capacity", age_group = "children") %>%
  gather(capacity, mean_cent, -character) %>%
  ggplot(aes(x = capacity, y = character, fill = mean_cent)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-2, 2),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 4- to 6-year-old children",
       subtitle = "9 characters, 20 capacities",
       x = "Capacity",
       y = "Character",
       fill = "Mean rating")
```


## SVD {.tabset}

### Adults

```{r}
svd3_ad <- svd(d_ad %>%
                 mean_cent_fun("capacity", age_group = "adults") %>%
                 column_to_rownames("character"),
               nrow(d_ad), nrow(d_ad))
```

```{r, fig.width = 4, fig.asp = 0.67}
plot_reconstruct_fun(svd3_ad, transpose = FALSE, "adults") +
  labs(title = "SVD: Adults (Approach #3)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_U_fun(svd3_ad, "adults", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #3)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd3_ad) +
  labs(title = "SVD: Adults (Approach #3)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_V_fun(svd3_ad, "adults", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #3)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

### Children, 7-9y

```{r}
svd3_79 <- svd(d_79 %>%
                 mean_cent_fun("capacity", age_group = "children") %>%
                 column_to_rownames("character"),
               nrow(d_79), nrow(d_79))
```

```{r, fig.width = 4, fig.asp = 0.67}
plot_reconstruct_fun(svd3_79, transpose = FALSE, "children") +
  labs(title = "SVD: Children, 7-9y (Approach #3)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_U_fun(svd3_79, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #3)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd3_79) +
  labs(title = "SVD: Children, 7-9y (Approach #3)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_V_fun(svd3_79, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #3)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

### Children, 4-6y

```{r}
svd3_46 <- svd(d_46 %>%
                 mean_cent_fun("capacity", age_group = "children") %>%
                 column_to_rownames("character"),
               nrow(d_46), nrow(d_46))
```

```{r, fig.width = 4, fig.asp = 0.67}
plot_reconstruct_fun(svd3_46, transpose = FALSE, "children") +
  labs(title = "SVD: Children, 4-6y (Approach #3)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_U_fun(svd3_46, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #3)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd3_46) +
  labs(title = "SVD: Children, 4-6y (Approach #3)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_V_fun(svd3_46, "children", transpose = FALSE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #3)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```


# Approach #4

In Approach #4 we mean-center by character, with capacities as rows and characters as columns.

## Raw data matrix visualizations {.tabset}

### Adults

In this study, 431 US adults rated 21 characters (between-subjects) on 40 capacities (within-subjects). (See Weisman, Dweck, & Markman, 2017, _PNAS_, Study 4.)

```{r, fig.width = 3, fig.asp = 1.49}
d_ad %>%
  mean_cent_fun("character", age_group = "adults") %>%
  gather(character, mean_cent, -capacity) %>%
  mutate(character = factor(character, levels = character_names_ad)) %>%
  ggplot(aes(y = capacity, x = character, fill = mean_cent)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-3, 3),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: Adults",
       subtitle = "21 characters, 40 capacities",
       y = "Capacity",
       x = "Character",
       fill = "Mean rating")
```

### 7-9yo children

In this study, 123 7- to 9-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 3, fig.asp = 1.49}
d_79 %>%
  mean_cent_fun("character", age_group = "children") %>%
  gather(character, mean_cent, -capacity) %>%
  mutate(character = factor(character, levels = character_names_ch)) %>%
  ggplot(aes(y = capacity, x = character, fill = mean_cent)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-2, 2),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 7- to 9-year-old children",
       subtitle = "9 characters, 20 capacities",
       y = "Capacity",
       x = "Character",
       fill = "Mean rating")
```

### 4-6yo children

In this study, 124 4- to 6-year-old US children rated 9 characters (between-subjects) on 20 capacities (within subjects). (See Weisman, Dweck, & Markman, 2018, _Proc. CogSci_.)

```{r, fig.width = 3, fig.asp = 1.49}
d_46 %>%
  mean_cent_fun("character", age_group = "children") %>%
  gather(character, mean_cent, -capacity) %>%
  mutate(character = factor(character, levels = character_names_ch)) %>%
  ggplot(aes(y = capacity, x = character, fill = mean_cent)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "black", high = "white", # limits = c(-2, 2),
                      guide = guide_colorbar(barheight = 15)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(title = "Raw data: 4- to 6-year-old children",
       subtitle = "9 characters, 20 capacities",
       y = "Capacity",
       x = "Character",
       fill = "Mean rating")
```


## SVD {.tabset}

### Adults

```{r}
svd4_ad <- svd(d_ad %>% 
                 mean_cent_fun("character", age_group = "adults") %>%
                 column_to_rownames("capacity"),
               nrow(d_ad), nrow(d_ad))
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_reconstruct_fun(svd4_ad, transpose = TRUE, "adults") +
  labs(title = "SVD: Adults (Approach #4)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_U_fun(svd4_ad, "adults", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #4)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd4_ad) +
  labs(title = "SVD: Adults (Approach #4)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_V_fun(svd4_ad, "adults", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Adults (Approach #4)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

### Children, 7-9y

```{r}
svd4_79 <- svd(d_79 %>% 
                 mean_cent_fun("character", age_group = "children") %>%
                 column_to_rownames("capacity"),
               nrow(d_79), nrow(d_79))
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_reconstruct_fun(svd4_79, transpose = TRUE, "children") +
  labs(title = "SVD: Children, 7-9y (Approach #4)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_U_fun(svd4_79, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #4)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd4_79) +
  labs(title = "SVD: Children, 7-9y (Approach #4)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_V_fun(svd4_79, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 7-9y (Approach #4)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```


### Children, 4-6y

```{r}
svd4_46 <- svd(d_46 %>% 
                 mean_cent_fun("character", age_group = "children") %>%
                 column_to_rownames("capacity"),
               nrow(d_46), nrow(d_46))
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_reconstruct_fun(svd4_46, transpose = TRUE, "children") +
  labs(title = "SVD: Children, 4-6y (Approach #4)",
       subtitle = "Reconstructed dataset: Sum of outer products (U, V) multiplied by S",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1.49}
plot_U_fun(svd4_46, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #4)",
       subtitle = "U: Feature vectors (by mode)",
       # subtitle = "U x S: Feature vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_S_fun(svd4_46) +
  labs(title = "SVD: Children, 4-6y (Approach #4)",
       subtitle = "S: Singular values (by mode)",
       x = "", y = "")
```

```{r, fig.width = 3, fig.asp = 1}
plot_V_fun(svd4_46, "children", transpose = TRUE, multiply = FALSE) +
  labs(title = "SVD: Children, 4-6y (Approach #4)",
       subtitle = "V: Item vectors (by mode)",
       # subtitle = "V x S: Item vectors multiplied by singular value (by mode)",
       x = "", y = "")
```

